Read the data

dataPrep_rdd <- 
  read.csv(file = '../data/dataPrep_production_rdd.csv') %>% 
  mutate(var = 'rdd', Wine_mhl = as.double(Wine_mhl) )

dataPrep_rvv <- 
  read.csv(file = '../data/dataPrep_production_rvv.csv') %>% 
  mutate(var = 'rvv',  DOY_Fs = 0, Wine_mhl = as.double(Wine_mhl))

df_prd <- rbind(dataPrep_rdd, dataPrep_rvv)


df_prd
ts.rdd <- ts( dataPrep_rdd$Wine_mhl, start = c(min(dataPrep_rdd$Year) ), frequency = 1)
ts.rvv <- ts( dataPrep_rvv$Wine_mhl, start = c(min(dataPrep_rvv$Year) ), frequency = 1)

str(ts.rdd)
##  Time-Series [1:90] from 1933 to 2022: 753 836 657 523 870 ...
str(ts.rvv)
##  Time-Series [1:82] from 1941 to 2022: 1295 1061 3008 3070 2205 ...

EDA Production Data

# convert the time series to data frames with a date column
df1 <- data.frame(Date = time(ts.rdd), Value1 = ts.rdd)
df2 <- data.frame(Date = time(ts.rvv), Value2 = ts.rvv)

# left join the two data frames based on the Date column
df_joined <- left_join(df1, df2, by = "Date")


# plot wine production fro the two regions
ggplot(df_joined, aes(x = Date)) +
  geom_line(aes(y = Value1, color = "Rdd")) +
  geom_line(aes(y = Value2, color = "Rvv")) +
  labs(title = "Wine production from RDD and RVV regions", x = "Date", y = "Value", subtitle = 'values in Mhl') +
  scale_color_manual(values = c("Rdd" = "#00AFBB", "Rvv" = "#E7B800")) +
  scale_y_continuous(expand = c(0, 0), limits = c(0,NA)) +
  theme_bw()

# plot wine production RVV with regulation year
ggplot(df_joined, aes(x = Date)) +
  geom_line(aes(y = Value2, color = "Rvv")) +
  labs(title = "Wine production from  RVV regions", x = "Date", y = "Value", subtitle = 'values in Mhl') +
  scale_color_manual(values = c( "Rvv" = "#E7B800")) +
  scale_y_continuous(expand = c(0, 0), limits = c(0,NA)) +
  geom_vline(xintercept = 1986) +
  theme_bw()

# create plotly line chart


df_joined %>% 
  plot_ly( x = ~Date, y = ~Value1, type = "scatter", mode = "lines", name = "Rdd") %>%
  add_trace(y = ~Value2, name = "Rvv") %>%
  layout(title = "Wine production from RDD and RVV regions", 
         xaxis = list(title = "Date"), 
         yaxis = list(title = "Production Mhl", range = c(0, max(df_joined[,2:3], na.rm = TRUE))))

Rdd region

## rdd plots  ----------------------

#autoplot(ts.rdd, main = 'Wine production for RDD region in Mhl')

autoplot(ts.rdd)+
  geom_smooth() +
   expand_limits(y = 0) +
  labs('Wine production for RDD region in Mh',
       y = "Wine Production in Mhl (in thousands)",
       x = NULL)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

x <- ts.rdd

# # lag plot of the series
# gglagplot(x)

# ACF plot of the series
ggAcf(x, lag.max = 34)

RVV

autoplot(ts.rvv)+
  geom_smooth() +
  expand_limits(y = 0) +
  labs('Wine production for RVV region in Mh',
       y = "Wine Production in Mhl (in thousands)",
       x = NULL)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 1 rows containing non-finite values (stat_smooth).

x <- ts.rvv

# # lag plot of the series
# gglagplot(x)

# ACF plot of the series
ggAcf(x, lag.max = 34)

stationary

To check if a time series is stationary, we can perform a statistical test, such as the Augmented Dickey-Fuller (ADF) test or the Kwiatkowski-Phillips-Schmidt-Shin (KPSS) test.

The ADF test checks for the presence of a unit root in the time series. The ADF test is based on the null hypothesis that the time series has a unit root (i.e., it is non-stationary), and the alternative hypothesis that the time series is stationary. If the p-value is less than the significance level (usually 0.05), then we can reject the null hypothesis of a unit root and conclude that the time series is stationary

The KPSS test checks for trend stationarity (i.e., the absence of a trend) and level stationarity (i.e., the absence of a systematic difference between the mean and variance of the time series).

Tests for RDD

# Perform the ADF test
adf.test(ts.rdd)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts.rdd
## Dickey-Fuller = -3.2485, Lag order = 4, p-value = 0.0849
## alternative hypothesis: stationary
# Perform the KPSS test
kpss.test(ts.rdd, null = 'Trend')
## 
##  KPSS Test for Trend Stationarity
## 
## data:  ts.rdd
## KPSS Trend = 0.14496, Truncation lag parameter = 3, p-value = 0.05193

Tests for RVV

# Perform the ADF test
adf.test(na.exclude(ts.rvv))
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.exclude(ts.rvv)
## Dickey-Fuller = -2.1809, Lag order = 4, p-value = 0.5017
## alternative hypothesis: stationary
# Perform the KPSS test
kpss.test(na.exclude(ts.rvv), null = 'Trend')
## Warning in kpss.test(na.exclude(ts.rvv), null = "Trend"): p-value smaller than
## printed p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  na.exclude(ts.rvv)
## KPSS Trend = 0.29441, Truncation lag parameter = 3, p-value = 0.01

EDA

Check the days of Year for the several events

# https://r-graph-gallery.com/density_mirror_ggplot2.html

df_prd %>% select(var,DOY_BB,DOY_Fl, DOY_sM,DOY_Hv) %>% 
  gather(key = key, value = value,- var) %>% 
  group_by(var, key) %>% 
  summarise(#min = min(value),
            #max = max(value),
            median = median(value),.groups = 'drop') %>% 
  spread(key = var,value = median) %>% 
  rename(median_rdd = rdd, 
         median_rvv = rvv) %>% 
  kable() %>% kableExtra::kable_minimal() 
key median_rdd median_rvv
DOY_BB 87.5 88.5
DOY_Fl 135.0 154.0
DOY_Hv 257.0 267.5
DOY_sM 179.0 225.5
  #add_header_above(c(" ", "Hello" = 2, "World" = 2))


df_prd %>%
    select(- Wine_mhl, - DOY_Fs) %>% 
    rename(Region = var) %>% 
   # mutate(index = row_number()) %>% 
    gather(key = variable, value = value, -Region, -Year) %>% 
    spread(key = Region,value = value) %>% 
    ggplot() +
    
    # Top
    geom_density( aes(x = rdd, y = ..density..), fill="#69b3a2" ) +
    geom_label( aes(x=40, y=0.01, label="rdd"), color="#69b3a2") +
    
    # Bottom
    geom_density( aes(x = rvv, y = -..density..), fill= "#404080") +
    geom_label( aes(x=320, y=-0.01, label="rvv"), color="#404080") +
  
    theme_ipsum() +
    theme(
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8)
    ) +
    xlab("") +
    labs(title = 'Distribution of days of year for several events') +
    facet_wrap(~variable)
## Warning: Removed 32 rows containing non-finite values (stat_density).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database

## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## RDD variables  ----------------------

# df_prd %>%
#  # filter(var == 'rdd' ) %>% 
#   select(-Year, -Wine_mhl) %>% 
#   gather(key="region", value="value") %>%
#   mutate(value = round(as.numeric(value),0)) %>% 
#   mutate_all(~ifelse(is.na(.), 0, .)) %>% 
#   
#   mutate(region = fct_reorder(region, value)) %>%
#   ggplot( aes(color=region, fill=region)) +
#   
#   ## histogram  ----------------------
#   
#   # geom_histogram(alpha=0.6, binwidth = 5) +
# 
#   ## densitys  ----------------------
#     
#   geom_density( aes(x = value, y = ..density..) ) +
# 
#     scale_fill_viridis(discrete=TRUE) +
#     scale_color_viridis(discrete=TRUE) +
# 
# 
#     theme_ipsum() +
#     theme(
#       legend.position="none",
#       panel.spacing = unit(0.1, "lines"),
#       strip.text.x = element_text(size = 8)
#     ) +
#     xlab("") +
#     ylab("Assigned Probability (%)") +
#     labs(title = 'RDD days of the year distributions for events') +
#     facet_wrap(~region)

Day of the Year for the Bud Break – DOY_BB

plot_densitys(df_prd = df_prd, x = 'DOY_BB', x_label =  'Bud Break')
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Day of the Year for Flowering – DOY_Fl

plot_densitys(df_prd = df_prd, x = 'DOY_Fl', x_label =  'Flowering')
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Day of the Year for Start of the maturity – DOY_sM

plot_densitys(df_prd = df_prd, x = 'DOY_sM', x_label =  'Start of the maturity')
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Day of the Year for Start of the maturity – DOY_Fs

Data for the RVV is not available for Fruit Set

plot_densitys(df_prd , x = 'DOY_Fs', x_label =  'Fruit Set')
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

Day of the Year for the Harvest – DOY_Hv

Data for the RVV is not available for Fruit Set

plot_densitys(df_prd , x = 'DOY_Hv', x_label =  'Harvest')
## Warning: Removed 8 rows containing non-finite values (stat_density).
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database